[Event Hub] PartitionResolver Jenkin3 optimizations #50068

danielmarbach · 2025-05-14T21:15:33Z

As discussed with @jsquire this optimizes the Jenkin3 lookup hash computation on the partition key resolver. I kept the hashing part as good as possible as is.

Rewrote the main 12-byte mix loop to use a single ref byte pointer + Unsafe.ReadUnaligned<uint> instead of Span<byte>.Slice or indexed access
Streamlined tail handling (0–11 bytes) with unaligned loads and pointer arithmetic, dropping all remaining bounds checks
Removed all intermediate ReadOnlySpan<uint> casts and allocations in the loop, preserving exact bit-for-bit Jenkins lookup3 output

Results

Benchmark

using System;
using System.Buffers.Binary;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Diagnosers;
using BenchmarkDotNet.Exporters;
using BenchmarkDotNet.Jobs;

namespace MicroBenchmarks.EventHubs;

[Config(typeof(Config))]
public class ComputeHash
{
    private byte[] inputBytes;

    private class Config : ManualConfig
    {
        public Config()
        {
            AddExporter(MarkdownExporter.GitHub);
            AddDiagnoser(MemoryDiagnoser.Default);
            AddJob(Job.Default);
        }
    }

    [Params(8, 12, 24, 32, 64, 128, 255)]
    public int Size { get; set; }

    [GlobalSetup]
    public void Setup()
    {
        inputBytes = Encoding.UTF8.GetBytes(new string('a', Size));
    }

    [Benchmark(Baseline = true)]
    public short Current()
    {
        return ComputeHashBefore.GenerateHashCode(inputBytes);
    }

    [Benchmark]
    public short V1()
    {
        return ComputeHashV1.GenerateHashCode(inputBytes);
    }

    [Benchmark]
    public short V2()
    {
        return ComputeHashV2.GenerateHashCode(inputBytes);
    }
}

public static class ComputeHashBefore
{
    public static short GenerateHashCode(byte[] partitionKey)
    {
        ComputeHash(partitionKey, seed1: 0, seed2: 0, out uint hash1, out uint hash2);
        return (short) (hash1 ^ hash2);
    }

    private static void ComputeHash(ReadOnlySpan<byte> data,
        uint seed1,
        uint seed2,
        out uint hash1,
        out uint hash2)
    {
                uint a, b, c;

        a = b = c = (uint) (0xdeadbeef + data.Length + seed1);
        c += seed2;

        int index = 0, size = data.Length;
        while (size > 12)
        {
            a += BinaryPrimitives.ReadUInt32LittleEndian(data[index..]);
            b += BinaryPrimitives.ReadUInt32LittleEndian(data[(index + 4)..]);
            c += BinaryPrimitives.ReadUInt32LittleEndian(data[(index + 8)..]);

            a -= c;
            a ^= (c << 4) | (c >> 28);
            c += b;

            b -= a;
            b ^= (a << 6) | (a >> 26);
            a += c;

            c -= b;
            c ^= (b << 8) | (b >> 24);
            b += a;

            a -= c;
            a ^= (c << 16) | (c >> 16);
            c += b;

            b -= a;
            b ^= (a << 19) | (a >> 13);
            a += c;

            c -= b;
            c ^= (b << 4) | (b >> 28);
            b += a;

            index += 12;
            size -= 12;
        }

        switch (size)
        {
            case 12:
                a += BinaryPrimitives.ReadUInt32LittleEndian(data[index..]);
                b += BinaryPrimitives.ReadUInt32LittleEndian(data[(index + 4)..]);
                c += BinaryPrimitives.ReadUInt32LittleEndian(data[(index + 8)..]);
                break;
            case 11:
                c += ((uint) data[index + 10]) << 16;
                goto case 10;
            case 10:
                c += ((uint) data[index + 9]) << 8;
                goto case 9;
            case 9:
                c += (uint) data[index + 8];
                goto case 8;
            case 8:
                b += BinaryPrimitives.ReadUInt32LittleEndian(data[(index + 4)..]);
                a += BinaryPrimitives.ReadUInt32LittleEndian(data[index..]);
                break;
            case 7:
                b += ((uint) data[index + 6]) << 16;
                goto case 6;
            case 6:
                b += ((uint) data[index + 5]) << 8;
                goto case 5;
            case 5:
                b += (uint) data[index + 4];
                goto case 4;
            case 4:
                a += BinaryPrimitives.ReadUInt32LittleEndian(data[index..]);
                break;
            case 3:
                a += ((uint) data[index + 2]) << 16;
                goto case 2;
            case 2:
                a += ((uint) data[index + 1]) << 8;
                goto case 1;
            case 1:
                a += (uint) data[index];
                break;
            case 0:
                hash1 = c;
                hash2 = b;
                return;
        }

        c ^= b;
        c -= (b << 14) | (b >> 18);

        a ^= c;
        a -= (c << 11) | (c >> 21);

        b ^= a;
        b -= (a << 25) | (a >> 7);

        c ^= b;
        c -= (b << 16) | (b >> 16);

        a ^= c;
        a -= (c << 4) | (c >> 28);

        b ^= a;
        b -= (a << 14) | (a >> 18);

        c ^= b;
        c -= (b << 24) | (b >> 8);

        hash1 = c;
        hash2 = b;
    }
}

public static class ComputeHashV1
{
    public static short GenerateHashCode(byte[] partitionKey)
    {
        ComputeHash(partitionKey, seed1: 0, seed2: 0, out uint hash1, out uint hash2);
        return (short) (hash1 ^ hash2);
    }

    private static void ComputeHash(ReadOnlySpan<byte> data,
        uint seed1,
        uint seed2,
        out uint hash1,
        out uint hash2)
    {
        uint len = (uint)data.Length;
        uint a = 0xDEADBEEF + len + seed1;
        uint b = a;
        uint c = a + seed2;

        int tripletCount = data.Length > 12 ? (data.Length - 1) / 12 : 0;

        int regionBytes = tripletCount * 12; // must be divisible by 4
        ReadOnlySpan<byte> region = data.Slice(0, regionBytes);
        ReadOnlySpan<uint> words = MemoryMarshal.Cast<byte, uint>(region);

        int i = 0;
        for (; i < tripletCount; i++)
        {
            int idx = i * 3;
            uint w0 = BitConverter.IsLittleEndian ? words[idx] : BinaryPrimitives.ReverseEndianness(words[idx]);
            uint w1 = BitConverter.IsLittleEndian
                ? words[idx + 1]
                : BinaryPrimitives.ReverseEndianness(words[idx + 1]);
            uint w2 = BitConverter.IsLittleEndian
                ? words[idx + 2]
                : BinaryPrimitives.ReverseEndianness(words[idx + 2]);

            a += w0;
            b += w1;
            c += w2;

            a -= c;
            a ^= (c << 4) | (c >> 28);
            c += b;

            b -= a;
            b ^= (a << 6) | (a >> 26);
            a += c;

            c -= b;
            c ^= (b << 8) | (b >> 24);
            b += a;

            a -= c;
            a ^= (c << 16) | (c >> 16);
            c += b;

            b -= a;
            b ^= (a << 19) | (a >> 13);
            a += c;

            c -= b;
            c ^= (b << 4) | (b >> 28);
            b += a;
        }

        int byteIndex = regionBytes;
        int size = data.Length - byteIndex;
        switch (size)
        {
            case 12:
                a += BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(byteIndex));
                b += BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(byteIndex + 4));
                c += BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(byteIndex + 8));
                break;
            case 11:
                c += (uint)data[byteIndex + 10] << 16;
                goto case 10;
            case 10:
                c += (uint)data[byteIndex + 9] << 8;
                goto case 9;
            case 9:
                c += data[byteIndex + 8];
                goto case 8;
            case 8:
                b += BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(byteIndex + 4));
                a += BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(byteIndex));
                break;
            case 7:
                b += (uint)data[byteIndex + 6] << 16;
                goto case 6;
            case 6:
                b += (uint)data[byteIndex + 5] << 8;
                goto case 5;
            case 5:
                b += data[byteIndex + 4];
                goto case 4;
            case 4:
                a += BinaryPrimitives.ReadUInt32LittleEndian(data.Slice(byteIndex));
                break;
            case 3:
                a += (uint)data[byteIndex + 2] << 16;
                goto case 2;
            case 2:
                a += (uint)data[byteIndex + 1] << 8;
                goto case 1;
            case 1:
                a += data[byteIndex];
                break;
            case 0:
                hash1 = c;
                hash2 = b;
                return;
        }

        c ^= b;
        c -= (b << 14) | (b >> 18);

        a ^= c;
        a -= (c << 11) | (c >> 21);

        b ^= a;
        b -= (a << 25) | (a >> 7);

        c ^= b;
        c -= (b << 16) | (b >> 16);

        a ^= c;
        a -= (c << 4) | (c >> 28);

        b ^= a;
        b -= (a << 14) | (a >> 18);

        c ^= b;
        c -= (b << 24) | (b >> 8);

        hash1 = c;
        hash2 = b;
    }
}

public static class ComputeHashV2
{
    public static short GenerateHashCode(byte[] partitionKey)
    {
        ComputeHash(partitionKey, seed1: 0, seed2: 0, out uint hash1, out uint hash2);
        return (short)(hash1 ^ hash2);
    }

    private static void ComputeHash(ReadOnlySpan<byte> data,
        uint seed1,
        uint seed2,
        out uint hash1,
        out uint hash2)
    {
        uint len = (uint)data.Length;
        uint a = 0xDEADBEEF + len + seed1;
        uint b = a;
        uint c = a + seed2;

        int chunks = data.Length > 12 ? (data.Length - 1) / 12 : 0;

        ref byte ptr = ref MemoryMarshal.GetReference(data);
        for (int i = 0; i < chunks; i++)
        {
            uint w0 = Unsafe.ReadUnaligned<uint>(ref ptr);
            uint w1 = Unsafe.ReadUnaligned<uint>(ref Unsafe.Add(ref ptr, 4));
            uint w2 = Unsafe.ReadUnaligned<uint>(ref Unsafe.Add(ref ptr, 8));
            ptr = ref Unsafe.Add(ref ptr, 12);

            if (!BitConverter.IsLittleEndian)
            {
                w0 = BinaryPrimitives.ReverseEndianness(w0);
                w1 = BinaryPrimitives.ReverseEndianness(w1);
                w2 = BinaryPrimitives.ReverseEndianness(w2);
            }

            a += w0;
            b += w1;
            c += w2;

            a -= c;
            a ^= (c << 4) | (c >> 28);
            c += b;

            b -= a;
            b ^= (a << 6) | (a >> 26);
            a += c;

            c -= b;
            c ^= (b << 8) | (b >> 24);
            b += a;

            a -= c;
            a ^= (c << 16) | (c >> 16);
            c += b;

            b -= a;
            b ^= (a << 19) | (a >> 13);
            a += c;

            c -= b;
            c ^= (b << 4) | (b >> 28);
            b += a;
        }

        int consumed = chunks * 12;
        ref byte tail = ref Unsafe.Add(ref MemoryMarshal.GetReference(data), consumed);
        int left = data.Length - consumed;
        switch (left)
        {
            case 12:
                a += BitConverter.IsLittleEndian
                    ? Unsafe.ReadUnaligned<uint>(ref tail)
                    : BinaryPrimitives.ReverseEndianness(Unsafe.ReadUnaligned<uint>(ref tail));
                b += BitConverter.IsLittleEndian
                    ? Unsafe.ReadUnaligned<uint>(ref Unsafe.Add(ref tail, 4))
                    : BinaryPrimitives.ReverseEndianness(Unsafe.ReadUnaligned<uint>(ref Unsafe.Add(ref tail, 4)));
                c += BitConverter.IsLittleEndian
                    ? Unsafe.ReadUnaligned<uint>(ref Unsafe.Add(ref tail, 8))
                    : BinaryPrimitives.ReverseEndianness(Unsafe.ReadUnaligned<uint>(ref Unsafe.Add(ref tail, 8)));
                break;
            case 11:
                c += (uint)Unsafe.Add(ref tail, 10) << 16;
                goto case 10;
            case 10:
                c += (uint)Unsafe.Add(ref tail, 9) << 8;
                goto case 9;
            case 9:
                c += Unsafe.Add(ref tail, 8);
                goto case 8;
            case 8:
                b += BitConverter.IsLittleEndian
                    ? Unsafe.ReadUnaligned<uint>(ref Unsafe.Add(ref tail, 4))
                    : BinaryPrimitives.ReverseEndianness(Unsafe.ReadUnaligned<uint>(ref Unsafe.Add(ref tail, 4)));
                a += BitConverter.IsLittleEndian
                    ? Unsafe.ReadUnaligned<uint>(ref tail)
                    : BinaryPrimitives.ReverseEndianness(Unsafe.ReadUnaligned<uint>(ref tail));
                break;
            case 7:
                b += (uint)Unsafe.Add(ref tail, 6) << 16;
                goto case 6;
            case 6:
                b += (uint)Unsafe.Add(ref tail, 5) << 8;
                goto case 5;
            case 5:
                b += Unsafe.Add(ref tail, 4);
                goto case 4;
            case 4:
                a += BitConverter.IsLittleEndian
                    ? Unsafe.ReadUnaligned<uint>(ref tail)
                    : BinaryPrimitives.ReverseEndianness(Unsafe.ReadUnaligned<uint>(ref tail));
                break;
            case 3:
                a += (uint)Unsafe.Add(ref tail, 2) << 16;
                goto case 2;
            case 2:
                a += (uint)Unsafe.Add(ref tail, 1) << 8;
                goto case 1;
            case 1:
                a += Unsafe.Add(ref tail, 0);
                break;
            case 0:
                hash1 = c;
                hash2 = b;
                return;
        }

        c ^= b;
        c -= (b << 14) | (b >> 18);

        a ^= c;
        a -= (c << 11) | (c >> 21);

        b ^= a;
        b -= (a << 25) | (a >> 7);

        c ^= b;
        c -= (b << 16) | (b >> 16);

        a ^= c;
        a -= (c << 4) | (c >> 28);

        b ^= a;
        b -= (a << 14) | (a >> 18);

        c ^= b;
        c -= (b << 24) | (b >> 8);

        hash1 = c;
        hash2 = b;
    }
}

github-actions · 2025-05-14T21:16:07Z

Thank you for your contribution @danielmarbach! We will review the pull request and get back to you soon.

Copilot

Pull Request Overview

This PR optimizes the Jenkin3 lookup hash computation in the partition key resolver by rewriting the 12-byte mix loop and streamlining tail handling.

Replaces slice/indexed access with a single ref byte pointer and Unsafe.ReadUnaligned calls
Streamlines tail processing using pointer arithmetic without bounds checks
Eliminates intermediate ReadOnlySpan casts and allocations

sdk/eventhub/Azure.Messaging.EventHubs/src/Core/PartitionResolver.cs

jsquire · 2025-05-16T23:28:08Z

Thanks, @danielmarbach! Impressive work, as usual. Not gonna lie, though, the unaligned reads do scare me a bit. Don't suppose that you were able to do any validation passes on any Apple silicon or ARM platforms? I confirmed our macOS test legs in the pipeline are Intel-based, so I'm going to set up an ARM test for Monday if you didn't happen to already do one.

danielmarbach · 2025-05-17T06:20:24Z

@jsquire I ran all the existing tests on my M3 macbook pro if that counts

jsquire · 2025-05-17T18:43:05Z

@jsquire I ran all the existing tests on my M3 macbook pro if that counts

It does indeed! If everything is passing on Apple silicon, I don't anticipate we'll run into an issues with ARM64 in Monday's pass.

jsquire

Looks like we've got test passes from runs on x64, ARM64, and Apple Silicon. Thanks, @danielmarbach!

Do you mind just throwing a change log entry in before we merge?

danielmarbach · 2025-05-19T19:21:57Z

@jsquire done

sdk/eventhub/Azure.Messaging.EventHubs/CHANGELOG.md

* Optimized Jenkin3 lookup version 1 * Optimized Jenkin3 lookup version 2 * Update CHANGELOG.md * Update sdk/eventhub/Azure.Messaging.EventHubs/CHANGELOG.md * Update sdk/eventhub/Azure.Messaging.EventHubs/CHANGELOG.md --------- Co-authored-by: Daniel Marbach <[email protected]> Co-authored-by: Jesse Squire <[email protected]>

Optimized Jenkin3 lookup version 1

037e7dc

Copilot AI review requested due to automatic review settings May 14, 2025 21:15

danielmarbach requested review from jsquire and m-redding as code owners May 14, 2025 21:15

github-actions bot added the Community Contribution Community members are working on the issue label May 14, 2025

github-actions bot added customer-reported Issues that are reported by GitHub users external to the Azure organization. Event Hubs labels May 14, 2025

Copilot AI reviewed May 14, 2025

View reviewed changes

sdk/eventhub/Azure.Messaging.EventHubs/src/Core/PartitionResolver.cs Outdated Show resolved Hide resolved

Optimized Jenkin3 lookup version 2

6b81a2d

danielmarbach force-pushed the partitioning branch from d0873e0 to 6b81a2d Compare May 14, 2025 21:25

jsquire approved these changes May 19, 2025

View reviewed changes

Update CHANGELOG.md

0b9e378

jsquire approved these changes May 19, 2025

View reviewed changes

sdk/eventhub/Azure.Messaging.EventHubs/CHANGELOG.md Outdated Show resolved Hide resolved

sdk/eventhub/Azure.Messaging.EventHubs/CHANGELOG.md Outdated Show resolved Hide resolved

jsquire added 2 commits May 19, 2025 14:05

Update sdk/eventhub/Azure.Messaging.EventHubs/CHANGELOG.md

3218e76

Update sdk/eventhub/Azure.Messaging.EventHubs/CHANGELOG.md

7085e2c

JoshLove-msft approved these changes May 19, 2025

View reviewed changes

jsquire merged commit 881b08b into Azure:main May 19, 2025
18 checks passed

danielmarbach deleted the partitioning branch May 20, 2025 06:27

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[Event Hub] PartitionResolver Jenkin3 optimizations #50068

[Event Hub] PartitionResolver Jenkin3 optimizations #50068

Uh oh!

danielmarbach commented May 14, 2025 •

edited

Loading

Uh oh!

github-actions bot commented May 14, 2025

Uh oh!

Copilot AI left a comment

Uh oh!

Uh oh!

jsquire commented May 16, 2025

Uh oh!

danielmarbach commented May 17, 2025

Uh oh!

jsquire commented May 17, 2025

Uh oh!

jsquire left a comment

Uh oh!

danielmarbach commented May 19, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

[Event Hub] PartitionResolver Jenkin3 optimizations #50068

[Event Hub] PartitionResolver Jenkin3 optimizations #50068

Uh oh!

Conversation

danielmarbach commented May 14, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Results

Benchmark

Uh oh!

github-actions bot commented May 14, 2025

Uh oh!

Copilot AI left a comment

Choose a reason for hiding this comment

Pull Request Overview

Uh oh!

Uh oh!

jsquire commented May 16, 2025

Uh oh!

danielmarbach commented May 17, 2025

Uh oh!

jsquire commented May 17, 2025

Uh oh!

jsquire left a comment

Choose a reason for hiding this comment

Uh oh!

danielmarbach commented May 19, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

danielmarbach commented May 14, 2025 •

edited

Loading